#################################################################################
# Filename: 2_round1_appendices.R
# Description: Takes the cleaned data from 1_round1_loaddata and creates 
# appendix materials: 
#   (1) covariate balance tests on the round 1 survey sample 
#   (2) regressions for main outcomes (incl. multiple comparisons correction)
#   (3) marginal effects plots 
#   (4) differential attrition analyses
#################################################################################
### Load library dependencies 
library(tidyverse)
library(broom)
library(DeclareDesign)
library(estimatr)
library(rio)
library(ggplot2)
library(gridExtra)
library(interflex)
library(stargazer)

# Import cleaned data from 1_round1_loaddata
dat <- import("Round_1_clean.Rdata") 

#################################################################################
# (1) Balance tests (Appendix B)
#################################################################################

# Figure A3 , upper panel
# Covariate balance for round 1 
# Comparison groups: tv drama / newscast 

dat$kangriju <- 0
dat$kangriju[grep("抗日电视剧", dat$Q43)] <- 1


pdf("balance_tests_round1.pdf", width=10, height=3.5)
par(mfrow = c(1,3),
    oma=c(1,1,0, 0) ,
    mar=c(4,1,1, 1) )

plot(1, xlim=c(-0.6, 0.6), ylim=c(1, 7), axes=F,  xlab="")

text(y = 7:1, x= 0.6,par("usr")[1], labels =  c("Age", "Education Level", "Urban Resident", "CCP Member", "Income", "Gender", "Hours Watching TV"), srt = 0, pos = 2, xpd = TRUE, cex=1.9)


test1 <-  t.test(dat$age[dat$clip=="leopard"], dat$age[dat$clip=="none"])
test2 <-  t.test(dat$education[dat$clip=="leopard"], dat$education[dat$clip=="none"])
test3 <-  t.test(dat$urban.residence[dat$clip=="leopard"], dat$urban.residence[dat$clip=="none"])
test4 <-  t.test(dat$party[dat$clip=="leopard"], dat$party[dat$clip=="none"])
test5 <-  t.test(dat$income[dat$clip=="leopard"], dat$income[dat$clip=="none"])
test6 <-  t.test(dat$gender[dat$clip=="leopard"], dat$gender[dat$clip=="none"])
test7 <-  t.test(dat$Q14[dat$clip=="leopard"], dat$Q14[dat$clip=="none"])


plot(100, pch=16, xlim=c(-0.5, 0.5), ylim=c(1, 7), col="black", xlab="Treatment: TV Drama", ylab="", yaxt= "n", cex.lab=1.6) 
abline(v=0, col="darkgray", lwd=2)

segments(test1$conf.int[1], 7, test1$conf.int[2], 7, col= 'black', lwd=2)
segments(test2$conf.int[1], 6, test2$conf.int[2], 6, col= 'black', lwd=2)
segments(test3$conf.int[1], 5, test3$conf.int[2], 5, col= 'black', lwd=2)
segments(test4$conf.int[1],4, test4$conf.int[2], 4,  col= 'black', lwd=2)
segments(test5$conf.int[1], 3, test5$conf.int[2], 3, col= 'black', lwd=2)
segments(test6$conf.int[1], 2, test6$conf.int[2], 2, col= 'black', lwd=2)
segments(test7$conf.int[1], 1, test7$conf.int[2], 1, col= 'black', lwd=2)

points(test1$estimate[1]-test1$estimate[2], 7, pch=16, col= 'black', cex=2)
points(test2$estimate[1]-test2$estimate[2], 6, pch=16, col= 'black', cex=2)
points(test3$estimate[1]-test3$estimate[2], 5, pch=16, col= 'black', cex=2)
points(test4$estimate[1]-test4$estimate[2], 4, pch=16, col= 'black', cex=2)
points(test5$estimate[1]-test5$estimate[2], 3, pch=16, col= 'black', cex=2)
points(test6$estimate[1]-test6$estimate[2], 2, pch=16, col= 'black', cex=2)
points(test7$estimate[1]-test7$estimate[2], 1, pch=16, col= 'black', cex=2)


test1 <-  t.test(dat$age[dat$clip=="yongbu"], dat$age[dat$clip=="none"])
test2 <-  t.test(dat$education[dat$clip=="yongbu"], dat$education[dat$clip=="none"])
test3 <-  t.test(dat$urban.residence[dat$clip=="yongbu"], dat$urban.residence[dat$clip=="none"])
test4 <-  t.test(dat$party[dat$clip=="yongbu"], dat$party[dat$clip=="none"])
test5 <-  t.test(dat$income[dat$clip=="yongbu"], dat$income[dat$clip=="none"])
test6 <-  t.test(dat$gender[dat$clip=="yongbu"], dat$gender[dat$clip=="none"])
test7 <-  t.test(dat$Q14[dat$clip=="yongbu"], dat$Q14[dat$clip=="none"])


plot(100, pch=16, xlim=c(-0.5, 0.5), ylim=c(1, 7), col="black", xlab="Treatment: Newscast", ylab="", yaxt= "n", cex.lab=1.6) 
abline(v=0, col="darkgray", lwd=2)

segments(test1$conf.int[1], 7, test1$conf.int[2], 7, col= 'black', lwd=2)
segments(test2$conf.int[1], 6, test2$conf.int[2], 6, col= 'black', lwd=2)
segments(test3$conf.int[1], 5, test3$conf.int[2], 5, col= 'black', lwd=2)
segments(test4$conf.int[1],4, test4$conf.int[2], 4,  col= 'black', lwd=2)
segments(test5$conf.int[1], 3, test5$conf.int[2], 3, col= 'black', lwd=2)
segments(test6$conf.int[1], 2, test6$conf.int[2], 2, col= 'black', lwd=2)
segments(test7$conf.int[1], 1, test7$conf.int[2], 1, col= 'black', lwd=2)

points(test1$estimate[1]-test1$estimate[2], 7, pch=16, col= 'black', cex=2)
points(test2$estimate[1]-test2$estimate[2], 6, pch=16, col= 'black', cex=2)
points(test3$estimate[1]-test3$estimate[2], 5, pch=16, col= 'black', cex=2)
points(test4$estimate[1]-test4$estimate[2], 4, pch=16, col= 'black', cex=2)
points(test5$estimate[1]-test5$estimate[2], 3, pch=16, col= 'black', cex=2)
points(test6$estimate[1]-test6$estimate[2], 2, pch=16, col= 'black', cex=2)
points(test7$estimate[1]-test7$estimate[2], 1, pch=16, col= 'black', cex=2)

dev.off()


#################################################################################
# (2) Regressions for main outcomes
#################################################################################

# Table A1: Round One: OLS Results for Main Outcomes

test1 <- lm(scale(patriot_pca)~clip, data=dat )
test2 <- lm(scale(patriot_pca)~clip+age+education+urban.residence+party+income+gender+Q14, data=dat )
test3 <- lm(scale(antiforeign_pca)~clip, data=dat )
test4 <- lm(scale(antiforeign_pca)~clip+age+education+urban.residence+party+income+gender+Q14, data=dat )
test5 <- lm(scale(militant_pca)~clip, data=dat )
test6 <- lm(scale(militant_pca)~clip+age+education+urban.residence+party+income+gender+Q14, data=dat )

summary(test1)
require(stargazer)

stargazer(test1, test2, test3, test4, test5, test6,
          covariate.labels = c("Anti-Japan television drama", "Anti-Japan newscast", "Age", "Education", "Urban resident", "Party", "Income", "Gender", "Hours watching TV"),
          dep.var.labels = c("Pro-China patriotism", "Anti-foreign nationalism", "Hawkishness"),
          omit.stat=c("f", "ser"))

# Table A2: Round One: OLS Results for Main Outcomes

test1 <- lm(Q19_1~clip, data=dat )
test2 <- lm(Q19_1~clip+age+education+urban.residence+party+income+gender+Q14, data=dat )
test3 <- lm(protest.gov~clip, data=dat )
test4 <- lm(protest.gov~clip+age+education+urban.residence+party+income+gender+Q14, data=dat )
test5 <- lm(protest.japan.signed~clip, data=dat )
test6 <- lm(protest.japan.signed~clip+age+education+urban.residence+party+income+gender+Q14, data=dat )

summary(test1)
require(stargazer)

stargazer(test1, test2, test3, test4, test5, test6,
          covariate.labels = c("Anti-Japan television drama", "Anti-Japan news cast", "Age", "Education", "Urban resident", "Party", "Income", "Gender", "Hours watching TV"),
          dep.var.labels = c("Anger", "Would protest government", "Signed petition against Japan"),
          omit.stat=c("f", "ser"))


# Corrected p-value for multiple comparisons (Appendix G)
lm1 <- lm(Q19_1~clip, data=dat )
lm2 <- lm(antiforeign_pca~clip, data=dat )
lm3 <- lm(militant_pca~clip, data=dat )
lm4 <- lm(patriot_pca~clip, data=dat )
lm5 <- lm(protest.japan.signed~clip, data=dat )
lm6 <- lm(protest.gov~clip, data=dat )

p <- c(summary(lm1)$coefficients[2,4],
       summary(lm1)$coefficients[3,4],
       summary(lm2)$coefficients[2,4],
       summary(lm2)$coefficients[3,4],
       summary(lm3)$coefficients[2,4],
       summary(lm3)$coefficients[3,4],
       summary(lm4)$coefficients[2,4],
       summary(lm4)$coefficients[3,4],
       summary(lm5)$coefficients[2,4],
       summary(lm5)$coefficients[3,4],
       summary(lm6)$coefficients[2,4],
       summary(lm6)$coefficients[3,4])

p.adjust(p, method = "BH", n = length(p))


#################################################################################
# (3) Marginal effects plots
#################################################################################

dat$age_graphics <- ifelse(dat$age==1, 18, dat$age*5 + 15 )

pdf("r1_age_anger_soft_ME.pdf")
inter.binning(Y = "Q19_1", D = "soft", X = "age_graphics", data = dat, nbins=5, 
              Xlabel="Age", Dlabel = "Anti-japanese \n television drama", Ylabel="anger", main=NULL,
              show.grid=F,theme.bw=T, vartype = "robust", na.rm=T)$graph
dev.off()

pdf("r1_age_anger_hard_ME.pdf")
inter.binning(Y = "Q19_1", D = "hard", X = "age_graphics", data = dat, nbins=5,
              Xlabel="Age", Dlabel = "Anti-Japanese \n news broadcast", Ylabel="anger", main=NULL,
              show.grid=F,theme.bw=T, vartype = "robust", na.rm=T)$graph
dev.off()

pdf("r1_age_antif_soft_ME.pdf")
inter.binning(Y = "antiforeign_pca", D = "soft", X = "age_graphics", data = dat,  nbins=7, 
              Xlabel="Age", Dlabel = "Anti-japanese \n television drama", Ylabel="anti-foreign sentiment", main=NULL,
              show.grid=F,theme.bw=T, vartype = "robust", na.rm=T)$graph
dev.off()

pdf("r1_age_antif_hard_ME.pdf")
inter.binning(Y = "antiforeign_pca", D = "hard", X = "age_graphics", data = dat, nbins=7, 
              Xlabel="Age", Dlabel = "Anti-japanese \n news broadcast", Ylabel="anti-foreign sentiment", main=NULL,
              show.grid=F,theme.bw=T, vartype = "robust", na.rm=T)$graph
dev.off()



#################################################################################
# (4) Differential Attrition analysis 
#################################################################################

# Look at differential attrition by clip (these are the percentages displayed in Table A6)
mean(dat$incomplete)                                # overall
mean(dat$incomplete[which(dat$clip=="none")])       # no clip
mean(dat$incomplete[which(dat$clip=="yongbu")])     # newscast
mean(dat$incomplete[which(dat$clip=="leopard")])    # tv drama


# Table A8: Trimming Bounds for estimated effects. 

dat$patriot_pca <- as.numeric(dat$patriot_pca)
dat$complete <- 0
dat$complete[dat$incomplete==0] <- 1

hard <- dat[is.na(dat$hard)==FALSE&is.na(dat$income)==FALSE&is.na(dat$gender)==FALSE,]
soft <- dat[is.na(dat$soft)==FALSE&is.na(dat$income)==FALSE&is.na(dat$gender)==FALSE,]

bounds <- (mean(hard$complete[hard$hard==0])-mean(hard$complete[hard$hard==1]))/mean(hard$complete[hard$hard==1])

lower.bound <- quantile(hard$patriot_pca[hard$hard==1], c(bounds), na.rm=T)
upper.bound <- quantile(hard$patriot_pca[hard$hard==1], c(1-bounds), na.rm=T)
summary(lm(patriot_pca~hard, dat=hard[which((hard$patriot_pca>lower.bound&hard$hard==1)|hard$hard==0),]))$coef[2,1]
summary(lm(patriot_pca~hard, dat=hard[which((hard$patriot_pca<upper.bound&hard$hard==1)|hard$hard==0),]))$coef[2,1]

lower.bound <- quantile(hard$antiforeign_pca[hard$hard==1], c(bounds), na.rm=T)
upper.bound <- quantile(hard$antiforeign_pca[hard$hard==1], c(1-bounds), na.rm=T)
summary(lm(antiforeign_pca~hard, dat=hard[which((hard$antiforeign_pca>lower.bound&hard$hard==1)|hard$hard==0),]))$coef[2,1]
summary(lm(antiforeign_pca~hard, dat=hard[which((hard$antiforeign_pca<upper.bound&hard$hard==1)|hard$hard==0),]))$coef[2,1]

lower.bound <- quantile(hard$militant_pca[hard$hard==1], c(bounds), na.rm=T)
upper.bound <- quantile(hard$militant_pca[hard$hard==1], c(1-bounds), na.rm=T)
summary(lm(militant_pca~hard, dat=hard[which((hard$militant_pca>lower.bound&hard$hard==1)|hard$hard==0),]))$coef[2,1]
summary(lm(militant_pca~hard, dat=hard[which((hard$militant_pca<upper.bound&hard$hard==1)|hard$hard==0),]))$coef[2,1]


lower.bound <- quantile(hard$Q19_1[hard$hard==1], c(bounds), na.rm=T)
upper.bound <- quantile(hard$Q19_1[hard$hard==1], c(1-bounds), na.rm=T)
summary(lm(Q19_1~hard, dat=hard[which((hard$Q19_1>lower.bound&hard$hard==1)|hard$hard==0),]))$coef[2,1]
summary(lm(Q19_1~hard, dat=hard[which((hard$Q19_1<upper.bound&hard$hard==1)|hard$hard==0),]))$coef[2,1]


bounds <- (mean(soft$complete[soft$soft==0])-mean(soft$complete[soft$soft==1]))/mean(soft$complete[soft$soft==1])

lower.bound <- quantile(soft$patriot_pca[soft$soft==1], c(bounds), na.rm=T)
upper.bound <- quantile(soft$patriot_pca[soft$soft==1], c(1-bounds), na.rm=T)
summary(lm(patriot_pca~soft, dat=soft[which((soft$patriot_pca>lower.bound&soft$soft==1)|soft$soft==0),]))$coef[2,1]
summary(lm(patriot_pca~soft, dat=soft[which((soft$patriot_pca<upper.bound&soft$soft==1)|soft$soft==0),]))$coef[2,1]

lower.bound <- quantile(soft$antiforeign_pca[soft$soft==1], c(bounds), na.rm=T)
upper.bound <- quantile(soft$antiforeign_pca[soft$soft==1], c(1-bounds), na.rm=T)
summary(lm(antiforeign_pca~soft, dat=soft[which((soft$antiforeign_pca>lower.bound&soft$soft==1)|soft$soft==0),]))$coef[2,1]
summary(lm(antiforeign_pca~soft, dat=soft[which((soft$antiforeign_pca<upper.bound&soft$soft==1)|soft$soft==0),]))$coef[2,1]

lower.bound <- quantile(soft$militant_pca[soft$soft==1], c(bounds), na.rm=T)
upper.bound <- quantile(soft$militant_pca[soft$soft==1], c(1-bounds), na.rm=T)
summary(lm(militant_pca~soft, dat=soft[which((soft$militant_pca>lower.bound&soft$soft==1)|soft$soft==0),]))$coef[2,1]
summary(lm(militant_pca~soft, dat=soft[which((soft$militant_pca<upper.bound&soft$soft==1)|soft$soft==0),]))$coef[2,1]


lower.bound <- quantile(soft$Q19_1[soft$soft==1], c(bounds), na.rm=T)
upper.bound <- quantile(soft$Q19_1[soft$soft==1], c(1-bounds), na.rm=T)
summary(lm(Q19_1~soft, dat=soft[which((soft$Q19_1>lower.bound&soft$soft==1)|soft$soft==0),]))$coef[2,1]
summary(lm(Q19_1~soft, dat=soft[which((soft$Q19_1<upper.bound&soft$soft==1)|soft$soft==0),]))$coef[2,1]



# Table A7: Reweighted results to account for missingness, 
# using inverse propensity score weighting as described in Gerber and Green (2012).
hard$observed <- 1 - (is.na(hard$patriot_pca))
soft$observed <- 1 - (is.na(soft$patriot_pca))


probobs <- glm(observed~hard*age+hard*education+hard*urban.residence+hard*party+hard*income+hard*gender, 
               data=hard, family=binomial(link="logit"))$fitted

summary(probobs[dat$hard==0])
summary(probobs[dat$hard==1])

wt <- 1/probobs
sel_valid <- hard$observed ==1
table(sel_valid)

lm1 <- lm(patriot_pca~hard, data=hard, subset=sel_valid, weights = wt)
lm2 <- lm(antiforeign_pca~hard, data=hard, subset=sel_valid, weights = wt)
lm3 <- lm(militant_pca~hard, data=hard, subset=sel_valid, weights = wt)

probobs <- glm(observed~soft*age+soft*education+soft*urban.residence+soft*party+soft*income+soft*gender, 
               data=soft, family=binomial(link="logit"))$fitted

summary(probobs[dat$hard==0])
summary(probobs[dat$hard==1])

wt <- 1/probobs

sel_valid <- soft$observed ==1
table(sel_valid)

lm4 <- lm(patriot_pca~soft, data=soft, subset=sel_valid, weights = wt)
lm5 <- lm(antiforeign_pca~soft, data=soft, subset=sel_valid, weights = wt)
lm6 <- lm(militant_pca~soft, data=soft, subset=sel_valid, weights = wt)


stargazer(lm1, lm2, lm3, lm4, lm5, lm6)

